@InProceedings{MeloSaCaSoPeSc:2018:ObTeSe,
author = "Melo, Victor Hugo Cunha de and Santos, Jesimon Barreto and Caetano
J{\'u}nior, Carlos Ant{\^o}nio and Souza, J{\'e}ssica Sena de
and Penatti, Ot{\'a}vio Augusto Bizetto and Schwartz, William
Robson",
affiliation = "Smart Sense Laboratory, Universidade Federal de Minas Gerais and
Smart Sense Laboratory, Universidade Federal de Minas Gerais and
Smart Sense Laboratory, Universidade Federal de Minas Gerais and
Smart Sense Laboratory, Universidade Federal de Minas Gerais and
Advanced Technologies, Samsung Research Institute and Smart Sense
Laboratory, Universidade Federal de Minas Gerais",
title = "Object-based Temporal Segment Relational Network for Activity
Recognition",
booktitle = "Proceedings...",
year = "2018",
editor = "Ross, Arun and Gastal, Eduardo S. L. and Jorge, Joaquim A. and
Queiroz, Ricardo L. de and Minetto, Rodrigo and Sarkar, Sudeep and
Papa, Jo{\~a}o Paulo and Oliveira, Manuel M. and Arbel{\'a}ez,
Pablo and Mery, Domingo and Oliveira, Maria Cristina Ferreira de
and Spina, Thiago Vallin and Mendes, Caroline Mazetto and Costa,
Henrique S{\'e}rgio Gutierrez and Mejail, Marta Estela and Geus,
Klaus de and Scheer, Sergio",
organization = "Conference on Graphics, Patterns and Images, 31. (SIBGRAPI)",
publisher = "IEEE Computer Society",
address = "Los Alamitos",
keywords = "Action recognition, contextual cues, relational reasoning.",
abstract = "Video understanding is the next frontier of computer vision, in
which activity recognition plays a major role. Despite the recent
improvements in holistic activity recognition, further researching
part-based models such as context may allow us to better
understand what is important for activities and thus improve our
current activity recognition models. This work tackles contextual
cues obtained from object detections, in which we posit that
objects relevant to an action are related to its spatial
arrangement regarding an agent. Based on that, we propose
Egocentric Pyramid to encode such spatial relationships. We
further extend it by proposing a data-centric approach named
Temporal Segment Relational Network (TSRN). Our experiments give
support to the hypothesis that object spatiality provides an
important clue to activity recognition. In addition, our
data-centric approach shows that besides such spatial features,
there may be other important information that further enhances the
object-based activity recognition, such as co-occurrence, relative
size, and temporal information.",
conference-location = "Foz do Igua{\c{c}}u, PR, Brazil",
conference-year = "29 Oct.-1 Nov. 2018",
doi = "10.1109/SIBGRAPI.2018.00020",
url = "http://dx.doi.org/10.1109/SIBGRAPI.2018.00020",
language = "en",
ibi = "8JMKD3MGPAW/3RPBTD2",
url = "http://urlib.net/ibi/8JMKD3MGPAW/3RPBTD2",
targetfile = "Paper ID 98.pdf",
urlaccessdate = "2024, May 03"
}